rm(list=ls())
library(foreign)
library(survival)
library(Amelia)
library(dplyr)
library(lubridate)
set.seed(940)

#Clean the raw data and perform multiple imputation

#set working directory
dat <- read.csv('raw_ts.csv')
dat <- dat[order(dat$Country, dat$Year), ]
dat <- subset(dat, Year > 1880 & Year <= 2015)
length(unique(dat$cus_caseid[dat$cus_caseid!='']))

#Fill in missing values of e_regionpol
e_regionpol_fill <- dat %>% 
  group_by(Country) %>% 
  dplyr::summarize(
    e_regionpol_fill = mean(e_regionpol, na.rm=T)
  )
dat <- merge(dat, e_regionpol_fill, by = 'Country', all.x = T)
dat$e_regionpol <- dat$e_regionpol_fill

#Transform variables
#Oil
dat$ln.oil_gas_valuePOP_2014 <- log(dat$oil_gas_valuePOP_2014 + 1)
dat$ln.oil_gas_valuePOP_2014L1 <- log(dat$oil_gas_valuePOP_2014L1 + 1)
dat$ln.oil_gas_valuePOP_2014L2 <- log(dat$oil_gas_valuePOP_2014L2 + 1)

#Population
dat$ln.tpop <- log(dat$tpop + 1)
dat$ln.tpopL1 <- log(dat$tpopL1 + 1)
dat$ln.tpopL2 <- log(dat$tpopL2 + 1)

#Military personnel per capita
dat$milperpc <- dat$milper/dat$tpop
dat$ln.milperpc <- log(dat$milperpc)

#Create decade variable
dat$decadeYear <- round((dat$Year - 1900)/10)

###############################################
#Multiple Imputation: One and two lags
###############################################
m <- 20

#---------------------------------------------------------
#Time-series - Multiple Imputation (one lag)
#---------------------------------------------------------
imputation_vars_ts <- c('cus_sr', 'partB', 'mil', 'mon', 'persB', 'sv_comm', 'InstalledCom', 'ln.tpopL1', 'ln.oil_gas_valuePOP_2014L1' , 'e_migdppclnL1' , 'e_migdpgroL1', 'polity2', 'decadeYear')
d_mi_ts <- amelia(dat[, c(imputation_vars_ts, 'Year', 'ccode', 'cyear')], ts = 'Year', cs = 'ccode', idvars = 'cyear', m = m, p2s = 1)
d_mi_ts$imputations <- lapply(d_mi_ts$imputations, function(x){
  x[['Country']] <- dat[['Country']]
  x[['cus_ARevnum']] <- dat[['cus_ARevnum']]
  x[['e_regionpol']] <-  dat[['e_regionpol']]
  x[['cus_fail']] <- dat[['cus_fail']]
  x[['cus_t']] <- dat[['cus_t']]
  x[['cus_tplus1']] <- dat[['cus_t']] +1
  x[['cus_AR']] <- dat[['cus_AR']]
  x[['cus_caseid']] <- dat[['cus_caseid']]
  x[['cus_sr']] <- dat[['cus_sr']]
  x[['cus_syear']] <- dat[['cus_syear']]
  x[['cus_eyear']] <- dat[['cus_eyear']]
  x[['cus_caseid']] <- as.character(dat[['cus_caseid']])
  return(x)}
)

#---------------------------------------------------------
#Spells - Multiple Imputation (two lags because regime begin one calendar year after power seizure)
#---------------------------------------------------------
imputation_vars_spells <- c('cus_sr', 'partB', 'mil', 'mon', 'persB', 'sv_comm' , 'InstalledCom', 'ln.tpopL2', 'ln.oil_gas_valuePOP_2014L2', 'warL2', 'e_migdppclnL2' , 'e_migdpgroL2', 'polity2', 'decadeYear')
d_mi_spells_t <- amelia(dat[, c(imputation_vars_spells, 'Year', 'ccode', 'cyear')], ts = 'Year', cs = 'ccode', idvars = 'cyear', m = m, p2s = 1)

#Main dataset
d_mi_spells_cus <- d_mi_spells_t
d_mi_spells_cus$imputations <- lapply(d_mi_spells_cus$imputations, function(x){
  x[['Country']] <- dat[['Country']]
  x[['cus_ARevnum']] <- dat[['cus_ARevnum']]
  x[['e_regionpol']] <-  dat[['e_regionpol']]
  x[['cus_fail']] <- dat[['cus_fail']]
  x[['cus_t']] <- dat[['cus_t']]
  x[['cus_AR']] <- dat[['cus_AR']]
  x[['cus_caseid']] <- dat[['cus_caseid']]
  x[['cus_sr']] <- dat[['cus_sr']]
  x[['cus_syear']] <- dat[['cus_syear']]
  x[['cus_eyear']] <- dat[['cus_eyear']]
  x[['cus_t_surv']] <- dat[['cus_t_surv']]
  
  x.out <- subset(x,  cus_caseid != '') %>%
    dplyr::group_by(cus_caseid) %>%
    dplyr::mutate(cus_fail = max(cus_fail))%>%
    dplyr::filter(cus_t == min(cus_t) & cus_AR %in% 1) %>%
    data.frame() 
  return(x.out) }
)

#GWF regimes
d_mi_spells_gwf <- d_mi_spells_t

d_mi_spells_gwf$imputations <- lapply(d_mi_spells_gwf$imputations, function(x){
  x[['Country']] <- dat[['Country']]
  x[['e_regionpol']] <-  dat[['e_regionpol']]
  x[['gwf_fail']] <- dat[['gwf_fail']]
  x[['gwf_t']] <- dat[['gwf_t']]
  x[['gwf_AR']] <- dat[['gwf_AR']]
  x[['gwf_caseid']] <- dat[['gwf_caseid']]
  x[['gwf_sr']] <- dat[['gwf_sr']]
  x[['gwf_syear']] <- dat[['gwf_syear']]
  x[['gwf_eyear']] <- dat[['gwf_eyear']]
  x[['gwf_spell']] <- dat[['gwf_spell']]
  x.out <- x %>%
    dplyr::group_by(gwf_caseid) %>%
    dplyr::mutate(gwf_fail = max(gwf_fail, na.rm=T),
           gwf_t_surv = gwf_spell)%>%
    dplyr::filter(gwf_t %in% min(gwf_t) & gwf_AR %in% 1) %>%
    data.frame()
  return(x.out) }
)

#Svolik ruling coalitions
#Add <1 year ruling coalitions
svol.orig <- read.csv('ruling coalitions in dictatorships, 1946-2008.csv')#load data
svol.orig$svol_fail <- svol.orig$rc_c
svol.orig$syear <- year(as.Date(svol.orig$rc_start, '%d%b%Y'))
svol.orig$smonth <- month(as.Date(svol.orig$rc_start, '%d%b%Y'))
svol.orig$sday <- day(as.Date(svol.orig$rc_start, '%d%b%Y'))
svol.orig$eyear <- year(as.Date(svol.orig$rc_end, '%d%b%Y'))
svol.orig$svol_t_surv <- svol.orig$eyear - svol.orig$syear
svol.orig$cyear <- apply(svol.orig[, c('ccode', 'syear')], 1, function(x) paste(x[1], x[2], sep = '-'))
svol.orig$svol_caseid_date <- apply(svol.orig[, c('cabb', 'syear', 'smonth', 'sday', 'eyear')], 1, function(x)paste(x[1], ' ', x[2], '.', x[3], '.', x[4], '-', x[5], sep = ''))
svol.orig$svol_caseid <- apply(svol.orig[, c('cabb', 'syear', 'eyear')], 1, function(x)paste(x[1], ' ', x[2], '-', x[3], sep = ''))
max(table(svol.orig$svol_caseid))
svol_sr <- c(
  'AFG 1996. 9.27-2001',
  'ALB 1944.11.29-1992',
  'ALG 1962. 7. 3-1992',
  'ANG 1979. 9.10-2008',
  'BOL 1952. 4.11-1969',
  'CAM 1975. 4.17-1979',
  'CHN 1949.10. 1-2008',
  'CUB 1959. 1. 2-2008',
  'ERI 1993. 5.24-2008',
  'GNB 1974. 9.10-1999',
  'IRN 1979. 2. 1-2008',
  'MEX 1940.12. 1-2000',
  'MZM 1975. 6.25-2008',
  'NIC 1979. 7.18-1990',
  'RWA 1994. 7.19-2008',
  'RUS 1923. 3.10-1991',
  'DRV 1945. 9. 2-2008',
  'YUG 1945. 3. 6-2000')
svol.orig$svol_sr <- ifelse(svol.orig$svol_caseid_date %in% svol_sr, 1, 0); sum(svol.orig$svol_sr)
svol.orig <- svol.orig[, c('ccode', 'syear', 'svol_caseid_date', 'svol_caseid', 'cyear', 'svol_fail', 'svol_sr', 'svol_t_surv')]

d_mi_spells_svol <- list()
d_mi_spells_svol$imputations <-  vector('list', 20)
names(d_mi_spells_svol$imputations) <- paste('imp.', 1:20, sep = '')
d_mi_spells_svol$m <-  20

#Note the start year is the calendar year in which the coalition begins
d_mi_spells_svol$imputations <- lapply(d_mi_ts$imputations, function(x){
  x.out <- merge(svol.orig, x[, c('ln.tpopL1', 'ln.oil_gas_valuePOP_2014L1' , 'e_migdppclnL1' , 'e_migdpgroL1', 'cyear')], by.x = 'cyear', by.y = 'cyear', all.x = T)
  return(x.out) 
}
)
svol.orig$cyear[!(svol.orig$cyear %in% d_mi_ts$imputations[[1]]$cyear)]

#MCM regimes
d_mi_spells_mcm <- d_mi_spells_t
d_mi_spells_mcm$imputations <- lapply(d_mi_spells_mcm$imputations, function(x){
  x[['Country']] <- dat[['Country']]
  x[['e_regionpol']] <-  dat[['e_regionpol']]
  x[['mcm_fail']] <- dat[['mcm_fail']]
  x[['mcm_t']] <- dat[['mcm_t']]
  x[['mcm_AR']] <- dat[['mcm_AR']]
  x[['mcm_caseid']] <- as.character(dat[['mcm_caseid']])
  x[['mcm_sr']] <- dat[['mcm_sr']]
  x[['mcm_syear']] <- dat[['mcm_syear']]
  x[['mcm_eyear']] <- dat[['mcm_eyear']]
  x.out <- subset(x, mcm_AR %in% 1) %>%
    dplyr::group_by(mcm_caseid) %>%
    dplyr::mutate(mcm_fail = max(mcm_fail),
           #mcm_eyear = max(Year),
           mcm_t_surv = mcm_eyear[1]- mcm_syear[1]
           ) %>%
    dplyr::filter(mcm_t == min(mcm_t) ) %>%
    data.frame() 
  return(x.out) 
  }
)

#N20 regimes
d_mi_spells_N20 <- d_mi_spells_t
d_mi_spells_N20$imputations <- lapply(d_mi_spells_N20$imputations, function(x){
  x[['Country']] <- dat[['Country']]
  x[['cus_ARevnum']] <- dat[['cus_ARevnum']]
  x[['e_regionpol']] <-  dat[['e_regionpol']]
  x[['cus_fail']] <- dat[['cus_fail']]
  x[['cus_t']] <- dat[['cus_t']]
  x[['cus_t_surv']] <- dat[['cus_t_surv']]
  x[['cus_AR']] <- dat[['cus_AR']]
  x[['cus_caseid']] <- dat[['cus_caseid']]
  x[['cus_sr']] <- dat[['cus_sr']]
  x[['cus_syear']] <- dat[['cus_syear']]
  x[['cus_eyear']] <- dat[['cus_eyear']]
  x[['cus_caseid']] <- as.character(dat[['cus_caseid']])
  
  #Extract the observation for 1918 (one year before regime), this means take 1920 instead of 1919
  hungary_slice <- subset(x, Year == 1920 & ccode == 310)
  hungary_slice$cus_caseid <- 'Hungary 1919-1919'
  hungary_slice$cus_AR <- 1
  hungary_slice$cus_syear <- 1919
  hungary_slice$cus_eyear <- 1919
  hungary_slice$partB <- 1
  hungary_slice$mil <- 0
  hungary_slice$mon <- 0
  hungary_slice$persB <- 0
  hungary_slice$sv_comm <- 1
  hungary_slice$InstalledCom <- 0
  hungary_slice$cus_t_surv <- 0
  hungary_slice$cus_t <- 0
  hungary_slice$cus_fail <- 1
  hungary_slice$cus_sr <- 1
  x[x$cus_caseid == 'Hungary 1919-1944', 'cus_ARevnum'] <- 2
  x[x$cus_caseid == 'Hungary 1947-1990', 'cus_ARevnum'] <- 3
  
  #Extract the observation for 1917 (one year before regime), this means take 1919 instead of 1918
  finland_slice <- subset(x, Year == 1919 & ccode == 375)
  finland_slice$cus_caseid <- 'Finland 1918-1918'
  finland_slice$cus_AR <- 1
  finland_slice$cus_ARevnum <- 0
  finland_slice$cus_syear <- 1918
  finland_slice$cus_eyear <- 1918
  finland_slice$partB <- 1
  finland_slice$mil <- 0
  finland_slice$mon <- 0
  finland_slice$persB <- 0
  finland_slice$sv_comm <- 1
  finland_slice$InstalledCom <- 1
  finland_slice$cus_t_surv <- 0
  finland_slice$cus_t <- 0
  finland_slice$cus_fail <- 1
  finland_slice$cus_sr <- 1
  names(hungary_slice)[!(names(hungary_slice)%in%names(x))]
  names(x)[!(names(x)%in%names(hungary_slice))]
  x <- rbind(x, hungary_slice, finland_slice)
  x <- x[order(x$Country, x$Year), ]
  x.out <- x %>%
    dplyr::group_by(cus_caseid) %>%
    dplyr::mutate(cus_fail = max(cus_fail))%>%
    dplyr::filter(cus_t == min(cus_t) & cus_AR == 1) %>%
    data.frame() 
  return(x.out) }
)

#---------------------------------------------------------
#Mechanisms
#---------------------------------------------------------
#Remove year of regime failure to avoid contribution of post-failure measurements
rmv_fail_years <- ifelse(dat$cus_fail == 1, NA, 1)

#Civil society strength
dat$v2xcs_ccsi_keep <- dat$v2xcs_ccsi * rmv_fail_years

#Military size
dat$milperpc_keep <- dat$milperpc * rmv_fail_years

#Party contol over the military
#Note that partymilit2 == 1 means there is no party
dat$partymilit_keep <- ifelse(dat$partymilit2 %in% 0, dat$partymilit, NA)

cus_spells <- subset(dat, cus_AR %in% 1) %>%
  dplyr::group_by(cus_caseid) %>%
  dplyr::mutate(
    v2xcs_ccsi_avg = mean(v2xcs_ccsi_keep, na.rm=T),
    milperpc_avg = mean(milperpc_keep, na.rm=T),
    partymilit_avg = mean(partymilit_keep, na.rm=T),
    prev_demo = mean(prev_demo),
    prev_partB = mean(prev_partB),
    prev_persB = mean(prev_persB),
    prev_mil = mean(prev_mil),
    prev_mon = mean(prev_mon),
    cus_fail = max(cus_fail, na.rm=T)
    )%>%
  dplyr::filter(cus_t == min(cus_t) & cus_AR %in% 1 ) %>%
  data.frame() 

#milperpc_avg is highly skewed, take the log
cus_spells$ln.milperpc_avg <- log(cus_spells$milperpc_avg)

#Coup attempts: calculate average coup rate within each regime
# First, format dates
dat$sdate_censored <- apply(dat[, c('cus_syear','cus_smonth', 'cus_sday')], 1, function(x) paste(x[1], x[2], x[3], sep = '-'))
dat$sdate_censored[dat$sdate_censored == 'NA-NA-NA'] <-NA
dat$sdate_censored <- as.Date(dat$sdate_censored)
dat$sdate_censored[dat$sdate_censored < '1950-01-01'] <- as.Date('1950-01-01')
dat$edate_censored <- apply(dat[, c('cus_eyear','cus_emonth', 'cus_eday')], 1, function(x) paste(x[1], x[2], x[3], sep = '-'))
dat$edate_censored[dat$edate_censored == 'NA-NA-NA'] <-NA
dat$edate_censored <- as.Date(dat$edate_censored)
dat$edate_censored[!is.na(dat$sdate) & is.na(dat$edate_censored)] <- as.Date('2014-12-31')

dat$date1 <- as.Date(dat$date1, '%d%b%Y');dat$date1L1 <- as.Date(dat$date1L1, '%d%b%Y')
dat$date2 <- as.Date(dat$date2, '%d%b%Y');dat$date2L1 <- as.Date(dat$date2L1, '%d%b%Y')
dat$date3 <- as.Date(dat$date3, '%d%b%Y');dat$date3L1 <- as.Date(dat$date3L1, '%d%b%Y')
dat$date4 <- as.Date(dat$date4, '%d%b%Y');dat$date4L1 <- as.Date(dat$date4L1, '%d%b%Y')

# Calculate sum of coups for Year > syear
library(lubridate)
coup_avgs <- subset(dat, Year %in% 1950:2014) %>% 
  group_by(cus_caseid) %>% 
  dplyr::summarize(
    coup.within.1 = sum(date1 > sdate_censored & date1 <= edate_censored+1, na.rm=T),#+/- 1 day buffer for coup dates 
    coup.within.2 = sum(date2 > sdate_censored & date2 <= edate_censored+1, na.rm=T),
    coup.within.3 = sum(date3 > sdate_censored & date3 <= edate_censored+1, na.rm=T),
    coup.within.4 = sum(date4 > sdate_censored & date4 <= edate_censored+1, na.rm=T),
    coups_sum = coup.within.1+coup.within.2+coup.within.3+coup.within.4,
    coups_avg = mean(coups_sum, na.rm=T),
    cus_sr = cus_sr[1],
    t = as.numeric(edate_censored[1]-sdate_censored[1], 'days')#,
  ) %>%
  filter(cus_caseid != '')
coup_avgs[c('cus_caseid', 'coups_sum', 't')]

#Resolve coups attempts that occur on Year == syear (year of regime birth)
reg_births <- subset(dat, cus_syear >= 1950) %>% 
  group_by(cus_caseid)  %>% 
  dplyr::summarize(
    cus_syear = cus_syear[1],
    date1L1 = date1L1[1],
    date2L1 = date2L1[1],
    date3L1 = date3L1[1],
    date4L1 = date4L1[1],
    coup1L1 = coup1L1[1],
    coup2L1 = coup2L1[1],
    coup3L1 = coup3L1[1],
    coup4L1 = coup4L1[1],
    Year = min(Year)-1,
    sdate_censored = sdate_censored[1],
    edate_censored = edate_censored[1]) %>% 
  filter(!is.na(date1L1) )

subset(reg_births, sdate_censored < date1L1 )[, c('cus_caseid', 'sdate_censored', 'date1L1', 'date2L1', 'date3L1', 'coup1L1', 'coup2L1', 'coup3L1')]
coup_avgs[coup_avgs$cus_caseid=='Gambia 1994-', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Gambia 1994-', 'coups_sum'] + 1
coup_avgs[coup_avgs$cus_caseid=='Mozambique 1975-', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Mozambique 1975-', 'coups_sum'] + 1
coup_avgs[coup_avgs$cus_caseid=='Nigeria 1993-1999', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Nigeria 1993-1999', 'coups_sum'] + 1
coup_avgs[coup_avgs$cus_caseid=='Peru 1992-2000', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Peru 1992-2000', 'coups_sum'] + 1
coup_avgs[coup_avgs$cus_caseid=='Sierra Leone 1992-1996', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Sierra Leone 1992-1996', 'coups_sum'] + 1
coup_avgs[coup_avgs$cus_caseid=='Yemen 1978-', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Yemen 1978-', 'coups_sum'] + 1

subset(reg_births, sdate_censored >= date1L1& sdate_censored < date2L1 )[, c('cus_caseid', 'sdate_censored', 'date1L1', 'date2L1', 'date3L1', 'coup1L1', 'coup2L1', 'coup3L1')]
coup_avgs[coup_avgs$cus_caseid=='Burundi 1966-1987', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Burundi 1966-1987', 'coups_sum'] + 1
coup_avgs[coup_avgs$cus_caseid=='Greece 1967-1974', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Greece 1967-1974', 'coups_sum'] + 1
coup_avgs[coup_avgs$cus_caseid=='Haiti 1988-1990', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Haiti 1988-1990', 'coups_sum'] + 1
coup_avgs[coup_avgs$cus_caseid=='Iraq 1963-1968', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Iraq 1963-1968', 'coups_sum'] + 1
coup_avgs[coup_avgs$cus_caseid=='Mali 2012-2013', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Mali 2012-2013', 'coups_sum'] + 1
coup_avgs[coup_avgs$cus_caseid=='Syria 1963-', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Syria 1963-', 'coups_sum'] + 1

subset(reg_births, sdate_censored >= date1L1 & sdate_censored >= date2L1 & sdate_censored < date3L1 )[, c('cus_caseid', 'sdate_censored', 'date1L1', 'date2L1', 'date3L1', 'coup1L1', 'coup2L1', 'coup3L1')]
coup_avgs[coup_avgs$cus_caseid=='Argentina 1955-1958', 'coups_sum'] <- coup_avgs[coup_avgs$cus_caseid=='Argentina 1955-1958', 'coups_sum'] + 1

#Compute average number of coups per year within each regime
coup_avgs$pth_att_avg <- coup_avgs$coups_sum/coup_avgs$t * 365.25
cus_spells <- merge(cus_spells, coup_avgs[, c("cus_caseid", "pth_att_avg")], all.x = T)

#-----------------------------
#Coup time series for Table A8
d_mi_ts_coups <- d_mi_ts
d_mi_ts_coups$imputations <- lapply(d_mi_ts_coups$imputations, function(x){ subset(x, Year %in% 1950:2014 & cus_AR %in% 1) })
dset_coups <- subset(dat, Year %in% 1950:2014 & cus_AR %in% 1)
nrow(d_mi_ts_coups$imputations[[1]]); nrow(dset_coups)

d_mi_ts_coups$imputations <- lapply(d_mi_ts_coups$imputations, function(x){
  x$coup <- 0
  #---------------
  #Year == syear
  x[x$Year == 1994+1& x$cus_caseid=='Gambia 1994-', 'coup'] <- 1
  x[x$Year == 1975+1& x$cus_caseid=='Mozambique 1975-', 'coup'] <- 1
  x[x$Year == 1993+1& x$cus_caseid=='Nigeria 1993-1999', 'coup'] <- 1
  x[x$Year == 1992+1& x$cus_caseid=='Peru 1992-2000', 'coup'] <- 1
  x[x$Year == 1992+1& x$cus_caseid=='Sierra Leone 1992-1996', 'coup'] <- 1
  x[x$Year == 1978+1& x$cus_caseid=='Yemen 1978-', 'coup'] <- 1

  x[x$Year == 1966+1& x$cus_caseid=='Burundi 1966-1987', 'coup'] <- 1
  x[x$Year == 1967+1& x$cus_caseid=='Greece 1967-1974', 'coup'] <- 1
  x[x$Year == 1988+1& x$cus_caseid=='Haiti 1988-1990', 'coup'] <- 1
  x[x$Year == 1963+1& x$cus_caseid=='Iraq 1963-1968', 'coup'] <- 1
  x[x$Year == 2012+1& x$cus_caseid=='Mali 2012-2013', 'coup'] <- 1
  x[x$Year == 1963+1& x$cus_caseid=='Syria 1963-', 'coup'] <- 1

  x[x$Year == 1955+1& x$cus_caseid=='Argentina 1955-', 'coup'] <- 1
  
  #---------------
  #Year == eyear
  x$sdate_censored <- apply(dset_coups[, c('cus_syear','cus_smonth', 'cus_sday')], 1, function(x) paste(x[1], x[2], x[3], sep = '-'))
  x$sdate_censored[x$sdate_censored == 'NA-NA-NA'] <-NA
  x$sdate_censored <- as.Date(x$sdate_censored)
  x$sdate_censored[x$sdate_censored < '1950-01-01'] <- as.Date('1950-01-01')
  
  x$edate_censored <- apply(dset_coups[, c('cus_eyear','cus_emonth', 'cus_eday')], 1, function(x) paste(x[1], x[2], x[3], sep = '-'))
  x$edate_censored[x$edate_censored == 'NA-NA-NA'] <-NA
  x$edate_censored <- as.Date(x$edate_censored)
  x$edate_censored[!is.na(x$sdate) & is.na(x$edate_censored)] <- as.Date('2014-12-31')
  x$eyear <- year(x$edate_censored)
  x$date1 <- as.Date(dset_coups$date1, '%d%b%Y')

  x[x$Year == x$eyear & !is.na(x$date1) & x$date1 < x$edate_censored + 1, c('coup')] <- 1

  #---------------
  #Year > syear and Year < eyear
  x[x$Year < x$eyear & !is.na(x$date1), c('coup')] <- 1
    
  #---------------
  #Time since last coup
  a <- x %>%
    dplyr::group_by(cus_caseid) %>%
    dplyr::mutate(
      coup.lagged = dplyr::lag(coup, n = 1, default = 0),
      coup_evnum = cumsum(coup.lagged))# %>%
  b <- a %>%
    dplyr::group_by(cus_caseid, coup_evnum) %>%
    dplyr::mutate(syear = ifelse(Year[1]!=1950, Year[1], cus_syear[1]),
                  coup_t = Year - syear,
                  coup_tplus1 = Year - syear + 1)
  return(b)
  }
)

y <- d_mi_ts_coups$imputations[[1]]

#------------------------------------------SAVE OUTPUT------------------------------------------
#Remove non-regime years from d_mi_ts:
d_mi_ts.out <- d_mi_ts
d_mi_ts.out$imputations <- lapply(d_mi_ts.out$imputations, function(x){ subset(x, cus_AR %in% 1) })
d_mi_ts <- d_mi_ts.out

save(dat, 
  d_mi_ts, 
  d_mi_ts_coups, 
  d_mi_spells_cus, 
  d_mi_spells_gwf, 
  d_mi_spells_svol, 
  d_mi_spells_mcm, 
  d_mi_spells_N20, 
  cus_spells, 
  file = 'main_data.Rdata')



